library(here)
## here() starts at /Users/Alvin/Documents/NCSU_Fall_2021/TheGreenChairProject/the-green-chair-project
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.1.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(choroplethr)
## Loading required package: acs
## Loading required package: XML
##
## Attaching package: 'acs'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:base':
##
## apply
library(choroplethrZip)
library(choroplethrMaps)
library(usmap)
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
tgcp_svi <- readRDS(here("cleaned_STATCOM_data_SVI.rds"))
zip_code_summary <- tgcp_svi[!is.na(tgcp_svi$ClientZipCode), ] %>% group_by(ClientZipCode) %>% summarise(num_clients = n(), prop_clients = n() / nrow(tgcp_svi))
zip_code_num_clients <- data.frame(region = as.character(zip_code_summary$ClientZipCode),
value = zip_code_summary$num_clients)
atleast5_zip <- zip_code_num_clients$region[zip_code_num_clients$value > 4 &
!is.na(zip_code_num_clients$region)]
# Yinqiao's function to highlight a particular county
highlight_county <- function(county_fips)
{
data(county.map, package="choroplethrMaps", envir=environment())
df = county.map[county.map$region %in% county_fips, ]
geom_polygon(data=df, aes(long, lat, group = group), color = "Darkgreen", fill = NA, size = 1)
}
First, we map all the client zip codes in North Carolina.
For the zip codes that are not in North Carolina, one zip code is from Virginia (with one client), one zip code is from West Virginia (with one client) and the rest are typos.
zip_choropleth(zip_code_num_clients,
state_zoom = c("north carolina"),
title = "Number of Clients",
legend = "Number of Clients",
num_colors = 9)
## Warning in super$initialize(zip.map, user.df): Your data.frame contains the
## following regions which are not mappable: 26511, 26604, 26715, 27060, 27454,
## 27515, 27528, 27547, 27593, 27619, 27629, 27676, 27690, 27697, 27699, 27710
## Warning in self$bind(): The following regions were missing and are being set to
## NA: 28734, 27855, 27812, 27209, 27938, 28610, 28704, 28778, 28227, 27028, 28371,
## 28526, 28134, 27936, 28411, 27544, 27316, 27455, 28373, 28213, 27350, 28670,
## 28080, 28455, 27830, 28708, 27229, 28383, 27953, 27853, 28127, 28539, 27207,
## 28366, 28617, 28091, 28032, 27849, 28635, 28137, 28660, 27288, 28584, 27258,
## 27406, 27101, 28311, 28438, 27556, 28104, 28676, 27986, 27242, 28555, 28369,
## 27947, 28516, 27013, 28356, 27935, 28649, 28740, 27041, 28672, 27929, 28480,
## 28623, 27932, 27127, 28097, 28804, 28398, 28662, 28393, 28636, 28452, 28363,
## 28039, 28508, 28537, 28469, 28690, 28076, 28083, 28619, 28801, 28501, 27804,
## 28658, 27922, 27360, 28456, 28629, 28205, 28052, 27948, 28023, 27409, 27377,
## 28613, 28761, 27884, 27820, 27325, 28202, 27212, 28785, 28462, 28528, 27306,
## 27568, 27886, 28441, 28554, 27873, 27281, 27282, 27541, 27262, 27807, 27263,
## 27553, 27559, 27814, 27816, 27291, 27817, 27818, 27821, 27572, 27310, 27299,
## 27301, 27563, 27305, 27314, 27355, 27326, 27006, 27009, 27011, 27012, 27349,
## 27341, 27343, 27370, 27022, 27023, 27024, 27045, 27047, 27048, 27379, 27051,
## 27805, 27054, 27403, 27574, 28379, 28504, 27410, 27522, 28711, 27203, 27214,
## 27265, 28159, 27231, 28043, 28016, 27976, 27979, 28374, 27243, 27248, 28756,
## 27252, 28012, 28019, 28673, 28020, 28034, 28678, 28679, 28385, 28040, 28386,
## 28027, 28031, 28033, 28665, 28668, 28072, 28421, 28054, 28075, 28701, 28702,
## 28392, 28423, 28396, 28707, 28709, 28078, 28405, 28692, 28409, 28071, 28720,
## 28718, 28103, 28722, 28430, 28725, 28726, 28729, 28731, 28105, 28431, 28107,
## 28434, 28439, 28442, 28086, 28089, 28715, 28449, 28747, 28133, 28460, 28138,
## 28465, 28754, 28757, 28117, 28112, 28119, 28735, 28736, 28120, 28468, 28124,
## 28125, 28741, 28749, 28164, 28166, 28169, 28510, 28512, 28144, 28773, 27371,
## 28774, 28775, 28762, 27320, 28763, 28766, 28135, 28025, 28901, 27298, 28904,
## 28783, 28214, 27837, 27840, 28544, 28909, 27842, 27843, 28546, 28789, 28174,
## 28523, 28524, 28552, 28527, 28529, 28803, 27825, 28557, 28532, 27826, 28209,
## 28538, 27829, 28210, 28211, 28212, 28805, 28301, 28303, 27862, 28305, 27863,
## 27864, 27869, 28217, 28226, 27872, 28310, 28570, 28571, 27847, 27878, 28572,
## 28574, 28269, 28589, 27888, 28590, 28594, 27890, 27846, 28582, 28585, 28320,
## 28618, 27923, 28340, 28333, 27926, 28343, 28622, 28624, 27937, 28630, 28351,
## 28654, 28357, 28638, 28642, 27950, 28644, 28360, 27957, 27896, 28349, 27965,
## 27966, 27970, 28626, 27973, 28627, 27974, 28650, 28663, 27534, 27042, 28160,
## 28646, 28344, 27505, 28318, 28395, 28079, 28581, 28806, 28684, 28520, 28721,
## 28556, 28752, 28645, 27856, 28681, 28018, 28781, 28436, 27828, 28206, 27407,
## 28216, 28347, 28304, 27358, 28685, 28403, 28443, 27107, 28278, 27980, 27949,
## 27014, 27851, 28262, 28634, 27208, 28669, 28733, 27542, 27016, 28640, 28390,
## 28573, 27106, 27583, 27845, 28115, 27019, 27283, 27581, 27920, 28204, 28081,
## 27235, 28208, 27050, 27897, 28746, 28207, 28372, 27018, 27917, 28073, 27876,
## 28429, 27958, 28470, 28605, 28730, 28578, 28277, 27875, 28677, 27941, 28748,
## 27021, 28902, 28173, 28147, 28683, 27027, 27020, 28387, 28129, 28463, 27806,
## 28759, 28453, 28461, 27356, 28525, 28547, 28905, 27865, 28215, 28273, 27832,
## 27313, 27871, 27844, 28579, 28583, 28586, 27357, 27007, 28464, 27915, 28609,
## 27046, 28323, 27927, 28606, 27942, 28345, 28348, 27103, 27408, 27954, 27503,
## 27968, 27972, 27405, 28671, 27110, 28001, 28365, 27531, 28384, 28666, 28553,
## 28037, 28682, 28689, 27808, 28693, 28168, 27315, 27924, 27311, 28377, 27105,
## 28732, 27981, 27880, 27233, 28467, 28098, 28698, 28399, 28450, 28454, 27889,
## 27939, 28428, 28772, 28521, 28056, 28432, 28163, 28167, 28270, 28751, 27946,
## 28341, 28631, 28791, 27870, 28580, 28587, 28314, 27919, 28332, 27921, 27928,
## 28352, 28006, 27962, 28128, 27376, 28391, 27824, 27813, 27822, 27030, 27295,
## 27565, 27589, 27249, 27017, 27332, 27340, 27025, 27709, 27109, 27201, 27239,
## 27244, 27530, 27982, 28007, 28017, 28364, 28367, 28375, 28655, 28657, 28659,
## 28694, 28697, 28705, 28422, 28424, 28712, 28713, 28714, 28716, 28717, 28090,
## 28092, 28101, 28102, 28433, 28445, 28447, 28114, 28739, 28742, 28753, 28139,
## 28146, 28478, 28511, 28771, 28779, 28152, 28519, 28792, 28543, 27850, 27852,
## 27866, 28577, 27874, 27881, 27882, 28282, 28307, 27891, 27892, 28326, 28327,
## 28604, 28607, 27909, 27910, 27916, 28612, 28616, 27925, 27943, 27944, 28338,
## 28625, 28637, 27956, 27967, 28358, 28647, 28651, 28652, 28513, 27823, 28530,
## 28337, 28777, 28667, 28675, 28420, 28350, 28602, 27960, 27978, 28531, 28601,
## 28109, 28786, 27827, 28472, 28723, 27562, 27253, 28451, 28787, 27292, 27052,
## 28308, 28342, 27841, 28551, 27819, 28444, 28448, 28110, 28458, 27284, 28745,
## 28479, 28782, 28150, 28533, 28790, 28540, 27317, 27596, 28575, 28280, 28312,
## 28330, 28621, 28628, 28643, 27053, 27055, 28315, 28021, 28077, 27964, 27861,
## 28768, 27883, 27043, 27205, 28108, 28382, 28425, 28088, 27555, 28394, 28719,
## 27839, 27983, 28009, 28466, 28435, 27831, 28906, 28244, 27302, 28611, 27879,
## 28325, 28457, 28170, 28743, 27551, 28515, 28560, 28615, 27985, 27959
data(zip.regions)
zip.regions.tgcp <- zip.regions[zip.regions$region %in% atleast5_zip, ]
tgcp_county_name_code <- unique(cbind(zip.regions.tgcp$county.name, zip.regions.tgcp$county.fips.numeric))
harnett_zip <- zip.regions$region[zip.regions$county.name == "harnett"]
johnston_zip <- zip.regions$region[zip.regions$county.name == "johnston"]
chatham_zip <- zip.regions$region[zip.regions$county.name == "chatham"]
wake_zip <- zip.regions$region[zip.regions$county.name == "wake"]
durham_zip <- zip.regions$region[zip.regions$county.name == "durham"]
orange_zip <- zip.regions$region[zip.regions$county.name == "orange"]
franklin_zip <- zip.regions$region[zip.regions$county.name == "franklin"]
nash_zip <- zip.regions$region[zip.regions$county.name == "nash"]
granville_zip <- zip.regions$region[zip.regions$county.name == "granville"]
intersect(atleast5_zip, harnett_zip) # contained within other counties
intersect(atleast5_zip, johnston_zip)
intersect(atleast5_zip, chatham_zip) # contained within other counties
intersect(atleast5_zip, wake_zip)
intersect(atleast5_zip, durham_zip)
intersect(atleast5_zip, orange_zip)
intersect(atleast5_zip, franklin_zip)
intersect(atleast5_zip, nash_zip) # contained within franklin
intersect(atleast5_zip, granville_zip) # contained within franklin
intersect(atleast5_zip, johnston_zip)
intersect(atleast5_zip, wake_zip)
intersect(atleast5_zip, durham_zip)
intersect(atleast5_zip, orange_zip)
intersect(atleast5_zip, franklin_zip)
Five counties contain all the zip codes with at least 5 client households: Orange, Durham, Franklin, Wake, and Johnston. Most of the zip codes lie within Wake county.
usmap::plot_usmap("counties", fill = "yellow", alpha = 0.25,
include = c("37101", "37183", "37063", "37135", "37069"),
labels = T) +
labs(title = "Counties Serviced by TGCP")
Below is a map of the zip codes in Wake County, taken from https://www.cccarto.com/nc/wake_zipcodes/.
First, we plot the number of household units and the need as represented by lower Per Capita Income (PCI). These two variables are taken from the CDC Social Vulnerability Index.
# tgcp_atleast5 <- tgcp_svi[tgcp_svi$ClientZipCode %in% atleast5_zip, ]
tgcp_pci_hh <- unique(data.frame(ClientZipCode = tgcp_svi$ClientZipCode[!is.na(tgcp_svi$ClientZipCode)],
EP_PCI = tgcp_svi$EP_PCI[!is.na(tgcp_svi$ClientZipCode)],
E_HH = tgcp_svi$E_HH[!is.na(tgcp_svi$ClientZipCode)]))
zip_code_pci <- data.frame(region = as.character(tgcp_pci_hh[, 1]), value = tgcp_pci_hh[, 2])
tgcp_pci_hh_subset <- tgcp_pci_hh[tgcp_pci_hh[, 1] %in% atleast5_zip, ]
# reversing the ranking based on PCI, by taking negative
score <- rank(-tgcp_pci_hh_subset[, 2], ties.method = "average", na.last = "keep")
# score <- score / max(score, na.rm = T)
zip_code_rank_pci <- data.frame(region = as.character(tgcp_pci_hh_subset[, 1]),
value = score)
zip_code_hh <- data.frame(region = as.character(tgcp_pci_hh[, 1]), value = tgcp_pci_hh[, 3])
Below shows the number of household units in each zip code that contains at least 5 TGCP client households. Wake County is outlined in green.
zip_choropleth(zip_code_hh,
zip_zoom = atleast5_zip,
title = "Number of Household Units",
legend = "Number of Household Units",
num_colors = 9) +
highlight_county(37183)
## Warning in super$initialize(zip.map, user.df): Your data.frame contains the
## following regions which are not mappable: 27593, 27697, 27619, 26715, 26604,
## 27699, 27454, 27690, 27629, 26511, 27547, 27515, 27528, 27676, 27060, 27710
Below is the same map, overlaid over Google Maps.
zip_choropleth(zip_code_hh,
zip_zoom = atleast5_zip,
title = "Number of Household Units",
legend = "Number of Household Units",
num_colors = 9,
reference_map = T) +
highlight_county(37183)
## Warning in super$initialize(zip.map, user.df): Your data.frame contains the
## following regions which are not mappable: 27593, 27697, 27619, 26715, 26604,
## 27699, 27454, 27690, 27629, 26511, 27547, 27515, 27528, 27676, 27060, 27710
## Source : https://maps.googleapis.com/maps/api/staticmap?center=35.787768,-78.642962&zoom=9&size=640x640&scale=2&maptype=terrain&language=en-EN&key=xxx
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Warning: Removed 1 rows containing missing values (geom_rect).
Below shows the Per Capita Income of each zip code.
zip_choropleth(zip_code_pci,
zip_zoom = atleast5_zip,
title = "Per Capita Income",
legend = "Per Capita Income",
num_colors = 9) +
highlight_county(37183)
## Warning in super$initialize(zip.map, user.df): Your data.frame contains the
## following regions which are not mappable: 27593, 27697, 27619, 26715, 26604,
## 27699, 27454, 27690, 27629, 26511, 27547, 27515, 27528, 27676, 27060, 27710
Below ranks the zip code according to PCI. That is, the rank of 1 is assigned to the highest PCI, and the last rank of 44 is assigned to the lowest PCI. Thus, a higher rank corresponds to higher need.
zip_choropleth(zip_code_rank_pci,
zip_zoom = atleast5_zip,
title = "Ranking of Need (corresponding to lower PCI)",
legend = "Ranking",
num_colors = 9) +
highlight_county(37183)
zip_code_summary_hh <- left_join(zip_code_summary, tgcp_pci_hh, by = "ClientZipCode")
zip_code_summary_hh$prop_hh_served <- zip_code_summary_hh$num_clients / zip_code_summary_hh$E_HH
zip_code_prop_hh <- data.frame(region = as.character(zip_code_summary_hh$ClientZipCode),
value = zip_code_summary_hh$prop_hh_served)
zip_code_summary_subset <- zip_code_summary_hh[zip_code_summary_hh$ClientZipCode %in% atleast5_zip, ]
score <- rank(zip_code_summary_subset$prop_hh_served, ties.method = "average")
# score <- score / max(score)
zip_code_rank_num_clients <- data.frame(region = as.character(zip_code_summary_subset$ClientZipCode),
value = score)
The below map shows the number of client households served by TGCP at each zip code.
zip_choropleth(zip_code_num_clients,
zip_zoom = atleast5_zip,
title = "Number of Clients",
legend = "Number of Clients",
num_colors = 9) +
highlight_county(37183)
## Warning in super$initialize(zip.map, user.df): Your data.frame contains the
## following regions which are not mappable: 26511, 26604, 26715, 27060, 27454,
## 27515, 27528, 27547, 27593, 27619, 27629, 27676, 27690, 27697, 27699, 27710
Below is the same map, overlaid over Google Maps.
zip_choropleth(zip_code_num_clients,
zip_zoom = atleast5_zip,
title = "Number of Clients",
legend = "Number of Clients",
num_colors = 9,
reference_map = T) +
highlight_county(37183)
## Warning in super$initialize(zip.map, user.df): Your data.frame contains the
## following regions which are not mappable: 26511, 26604, 26715, 27060, 27454,
## 27515, 27528, 27547, 27593, 27619, 27629, 27676, 27690, 27697, 27699, 27710
## Source : https://maps.googleapis.com/maps/api/staticmap?center=35.787768,-78.642962&zoom=9&size=640x640&scale=2&maptype=terrain&language=en-EN&key=xxx
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Warning: Removed 1 rows containing missing values (geom_rect).
zip_choropleth(zip_code_prop_hh,
zip_zoom = atleast5_zip,
title = "Proportion of Households Served",
legend = "Proportion",
num_colors = 9) +
highlight_county(37183)
## Warning in super$initialize(zip.map, user.df): Your data.frame contains the
## following regions which are not mappable: 26511, 26604, 26715, 27060, 27454,
## 27515, 27528, 27547, 27593, 27619, 27629, 27676, 27690, 27697, 27699, 27710
Below ranks the zip code according to the proportion of households served. That is, the rank of 1 corresponds to the lowest proportion of households served, and the last rank of 44 corresponds to the highest proportion of households served. Thus, a higher rank corresponds to more service, or “needs met”.
zip_choropleth(zip_code_rank_num_clients,
zip_zoom = atleast5_zip,
title = "Ranking of Proportion of Households Served",
legend = "Rank",
num_colors = 9) +
highlight_county(37183)
zip_code_rank_num_pci <- left_join(zip_code_rank_num_clients, zip_code_rank_pci, by = "region")
zip_code_rank_diff <- data.frame(region = zip_code_rank_num_pci$region, value = zip_code_rank_num_pci$value.y - zip_code_rank_num_pci$value.x)
# zip_code_rank_diff_rank <- data.frame(region = zip_code_rank_num_pci$region, value = rank(zip_code_rank_diff$value))
To measure “unmet need,” I take the difference of the PCI ranking (representing the “need”) and the proportion of households served ranking (representing the “met need”).
zip_choropleth(zip_code_rank_diff,
zip_zoom = atleast5_zip,
title = "Unmet Need",
legend = "Rank Difference",
num_colors = 9) +
highlight_county(37183)
Below is the same map, overlaid over Google Maps.
zip_choropleth(zip_code_rank_diff,
zip_zoom = atleast5_zip,
title = "Unmet Need",
legend = "Rank Difference",
num_colors = 9,
reference_map = T) +
highlight_county(37183)
## Source : https://maps.googleapis.com/maps/api/staticmap?center=35.787768,-78.642962&zoom=9&size=640x640&scale=2&maptype=terrain&language=en-EN&key=xxx
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Warning: Removed 1 rows containing missing values (geom_rect).
As shown in the two plots above, the zip codes with the most “unmet need” tend to be on the periphery of Wake County.